1 Setup

## load relevant libraries and functions
require(knitr)         # for knitting
library(Hmisc)         # for descriptives
library(psych)
library(png)           # for working with images
library(grid)
library(ggplotify)     # for plotting
library(patchwork)
library(scales)
library(lme4)          # for mixed effects models
library(ggeffects)
library(patchwork)
library(broom.mixed)
library(emmeans)
library(generics)
library(modelr)
library(tidyverse)     # for everything else

## set default code chunk options
knitr::opts_chunk$set(echo = T, warning = F, message = F)

## set default plot theme and colors
theme_set(theme_classic(base_size = 18))

## fix print width for knitted doc
options(width = 70)

## suppress warnings about grouping 
options(dplyr.summarise.inform = F)
options(xtable.floating = F)
options(xtable.timestamp = "")

## set random seed
set.seed(1)

## set directories for plots and data outputs
figures_dir = '../figures/'
data_dir = '../data/'

2 Phase 1: Exploring and visualizing the data

2.1 Load and examine the data

# Import data from https://science-of-learning-datasci.s3.us-west-2.amazonaws.com/mini-project2/coursekata-23-split80.zip

df.responses =
  read.csv(paste0(data_dir,
                  "filtered80_responses_2023.csv"))

df.filtered_responses =
  read.csv(paste0(data_dir,
                  "filtered80_eoc_2023.csv"))

# run quick data summary
skimr::skim(df.responses)
Table 2.1: Data summary
Name df.responses
Number of rows 381091
Number of columns 49
_______________________
Column type frequency:
character 35
logical 5
numeric 9
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
institution_id 0 1.00 36 36 0 1 0
class_id 0 1.00 36 36 0 5 0
course_name 0 1.00 25 25 0 1 0
release 0 1.00 4 9 0 5 0
book 0 1.00 43 53 0 2 0
student_id 0 1.00 36 36 0 560 0
item_id 0 1.00 5 25 0 484 0
item_type 0 1.00 4 10 0 2 0
chapter 0 1.00 26 59 0 9 0
page 0 1.00 11 70 0 87 0
response 203653 0.47 0 399 1 2183 0
prompt 0 1.00 0 1667 1578 647 0
dt_submitted 0 1.00 21 26 0 290124 0
completes_page 0 1.00 4 5 0 2 0
user_agent 0 1.00 0 184 124985 168 0
lrn_session_id 0 1.00 0 36 124985 166871 0
lrn_response_id 0 1.00 0 69 124985 256107 0
lrn_activity_reference 0 1.00 0 36 124985 403 0
lrn_question_reference 0 1.00 0 36 124985 502 0
lrn_type 0 1.00 0 21 124985 10 0
lrn_dt_started 0 1.00 0 23 124985 128867 0
lrn_dt_saved 0 1.00 0 23 124985 163870 0
lrn_status 0 1.00 0 9 124985 2 0
lrn_response_json 0 1.00 0 2354 124985 256107 0
lrn_option_0 0 1.00 0 191 228678 227 0
lrn_option_1 0 1.00 0 186 228678 230 0
lrn_option_2 0 1.00 0 188 252958 201 0
lrn_option_3 0 1.00 0 188 287799 147 0
lrn_option_4 0 1.00 0 142 342344 50 0
lrn_option_5 0 1.00 0 150 377421 7 0
lrn_option_6 0 1.00 0 19 380048 3 0
lrn_option_7 0 1.00 0 19 380048 3 0
dt_submitted_processed 0 1.00 20 20 0 282787 0
lrn_dt_started_processed 124985 0.67 20 20 0 128866 0
lrn_dt_saved_processed 124985 0.67 20 20 0 163869 0

Variable type: logical

skim_variable n_missing complete_rate mean count
lrn_option_9 381091 0 NaN :
lrn_option_10 381091 0 NaN :
lrn_option_11 381091 0 NaN :
lrn_dt_started_processed_ms 381091 0 NaN :
lrn_dt_saved_processed_ms 381091 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
lms_id 0 1.00 5.709242e+04 2.723007e+04 2.190500e+04 3.030300e+04 5.193200e+04 8.673300e+04 1.128510e+05 ▇▅▁▆▂
points_possible 90388 0.76 1.000000e+00 0.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 ▁▁▇▁▁
points_earned 90919 0.76 6.900000e-01 4.600000e-01 0.000000e+00 0.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 ▃▁▁▁▇
attempt 0 1.00 1.280000e+00 1.440000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 6.600000e+01 ▇▁▁▁▁
lrn_question_position 124985 0.67 1.670000e+00 1.330000e+00 1.000000e+00 1.000000e+00 1.000000e+00 2.000000e+00 1.000000e+01 ▇▁▁▁▁
lrn_option_8 380579 0.00 7.100000e+01 0.000000e+00 7.100000e+01 7.100000e+01 7.100000e+01 7.100000e+01 7.100000e+01 ▁▁▇▁▁
dt_submitted_processed_ms 0 1.00 1.679858e+12 4.920459e+09 1.671239e+12 1.674963e+12 1.681055e+12 1.682923e+12 1.691129e+12 ▆▅▇▂▃
chapter_num 0 1.00 5.040000e+00 2.400000e+00 1.000000e+00 3.000000e+00 5.000000e+00 7.000000e+00 9.000000e+00 ▅▇▂▅▅
page_num 0 1.00 5.620000e+00 3.030000e+00 1.000000e+00 3.000000e+00 5.000000e+00 8.000000e+00 1.400000e+01 ▇▇▆▃▁
skimr::skim(df.filtered_responses)
Table 2.1: Data summary
Name df.filtered_responses
Number of rows 4752
Number of columns 8
_______________________
Column type frequency:
character 5
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
book 0 1 43 53 0 2 0
release 0 1 4 9 0 5 0
institution_id 0 1 36 36 0 1 0
class_id 0 1 36 36 0 5 0
student_id 0 1 36 36 0 560 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1 2375.50 1371.93 0 1187.75 2375.50 3563.25 4751 ▇▇▇▇▇
chapter 0 1 4.95 2.59 1 3.00 5.00 7.00 9 ▇▇▃▇▇
score 0 1 0.73 0.20 0 0.62 0.77 0.88 1 ▁▁▃▆▇

2.2 Data Wrangling

df.reduced =
  df.responses %>%
  # calculate time spent on question (in mins)
  mutate(dt_submitted = ymd_hms(dt_submitted),
         lrn_dt_started = ymd_hms(lrn_dt_started),
         time_spent = as.numeric(difftime(dt_submitted,
                                          lrn_dt_started,
                                          units = "mins")),
         # make "completes_page" boolean
         completes_page = completes_page == "true") %>% 
  group_by(book, release,
           institution_id, class_id, student_id,
           chapter_num, item_id) %>%
  # count only last attempt
  slice_max(order_by = attempt,
            n = 1,
            with_ties = F) %>% 
  ungroup() %>%
  # keep only relevant cols
  select(c(book, release,
           institution_id, class_id,
           student_id,
           chapter = chapter_num, item_id,
           time_spent, completes_page,
           points_possible, points_earned, attempt))

# sanity check
df.reduced %>% 
  count(student_id, item_id) # should all be 1
## # A tibble: 248,227 × 3
##    student_id                           item_id                    n
##    <chr>                                <chr>                  <int>
##  1 001824fb-a2fd-431d-aef6-7a1250d97a62 A4_Code_Shuffling_01       1
##  2 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Empty_01           1
##  3 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Empty_02           1
##  4 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Median_01          1
##  5 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Median_02          1
##  6 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Median_03          1
##  7 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Median_04          1
##  8 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Median_05          1
##  9 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Predictions_01     1
## 10 001824fb-a2fd-431d-aef6-7a1250d97a62 B1_Code_Predictions_02     1
## # ℹ 248,217 more rows
# print top rows
head(df.reduced, n = 10)
## # A tibble: 10 × 12
##    book     release institution_id class_id student_id chapter item_id
##    <chr>    <chr>   <chr>          <chr>    <chr>        <int> <chr>  
##  1 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 Embedd…
##  2 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-0  
##  3 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-1  
##  4 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-10 
##  5 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-11 
##  6 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-12 
##  7 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-13 
##  8 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-14 
##  9 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-15 
## 10 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-16 
## # ℹ 5 more variables: time_spent <dbl>, completes_page <lgl>,
## #   points_possible <int>, points_earned <int>, attempt <int>
df.summary =
  df.reduced %>%
  # group % correct by book, release, institution, class, student, and chapter
  group_by(book, release,
           institution_id, class_id, student_id,
           chapter) %>%
  # calculate % correct across chapter
  summarise(score = mean(points_earned, na.rm = T),
            # calculate % of pages fully completed across chapter
            chapter_pages_completed = mean(completes_page, na.rm = T),
            # calculate time spent across chapter
            time_spent = mean(time_spent, na.rm = T),
            # calculate average number of attempts across chapter
            no_attempts = mean(attempt, na.rm = T),
            .groups = "drop_last") %>%
  arrange(book, release,
          institution_id, class_id, student_id,
          chapter) %>% 
  group_by(book, release, institution_id, class_id, student_id) %>% 
  # add column for % correct on preceding chapter
  mutate(score_prev_chapter = lag(score)) %>%   # NA for the first chapter
  ungroup()

# sanity check
df.summary %>% 
  count(student_id, chapter) # should all be 1
## # A tibble: 4,841 × 3
##    student_id                           chapter     n
##    <chr>                                  <int> <int>
##  1 001824fb-a2fd-431d-aef6-7a1250d97a62       1     1
##  2 001824fb-a2fd-431d-aef6-7a1250d97a62       2     1
##  3 001824fb-a2fd-431d-aef6-7a1250d97a62       3     1
##  4 001824fb-a2fd-431d-aef6-7a1250d97a62       4     1
##  5 001824fb-a2fd-431d-aef6-7a1250d97a62       5     1
##  6 001824fb-a2fd-431d-aef6-7a1250d97a62       6     1
##  7 001824fb-a2fd-431d-aef6-7a1250d97a62       7     1
##  8 001824fb-a2fd-431d-aef6-7a1250d97a62       8     1
##  9 001824fb-a2fd-431d-aef6-7a1250d97a62       9     1
## 10 0096491a-77bf-4e79-b906-fc7a8e5e57a6       1     1
## # ℹ 4,831 more rows
# print top rows
head(df.reduced, n = 10)
## # A tibble: 10 × 12
##    book     release institution_id class_id student_id chapter item_id
##    <chr>    <chr>   <chr>          <chr>    <chr>        <int> <chr>  
##  1 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 Embedd…
##  2 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-0  
##  3 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-1  
##  4 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-10 
##  5 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-11 
##  6 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-12 
##  7 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-13 
##  8 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-14 
##  9 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-15 
## 10 College… v5.0    292cff87-3c74… b7489f4… 0096491a-…       1 ch1-16 
## # ℹ 5 more variables: time_spent <dbl>, completes_page <lgl>,
## #   points_possible <int>, points_earned <int>, attempt <int>

2.3 Preliminary Item Difficulty Analysis: % correct

We have a total of 9 chapters, with data from N = 570 students. Not all students answered all questions in all chapters, so we get a range of N = 526 to 562. The proportion of correct responses ranges from 71.4% to 97.8% and seems to decline with chapter number (i.e., more advanced chapters are harder). The SD ranges from 17% to 5%, also increasing with chapter number (i.e., there is more variability in more advanced chapters).

# Print descriptive stats: For each chapter, compute % correct (mean), SD, and SE
df.descriptives =
  df.summary %>%
  group_by(chapter) %>%
  # compute mean
  dplyr::summarise(mean_correct = mean(score,
                                       na.rm = T),
                   # compute SD
                   sd_correct = sd(score,
                                   na.rm = T),
                   # compute number of responses
                   n = n(),
                   # compute SE (SD / sqrt of n)
                   se_correct = sd_correct/sqrt(n)) %>%
  ungroup()

# print
View(df.descriptives)

Below, we evaluate means and SDs by chapter and book:

# Print descriptive stats: For each chapter *and book*, compute % correct (mean), SD, and SE
df.descriptives2 =
  df.summary %>%
  group_by(chapter, book) %>%
  # compute mean
  dplyr::summarise(mean_correct = mean(score,
                                       na.rm = T),
                   # compute SD
                   sd_correct = sd(score,
                                   na.rm = T),
                   # compute number of responses
                   n = n(),
                   # compute SE (SD / sqrt of n)
                   se_correct = sd_correct/sqrt(n)) %>%
  ungroup()

# print
View(df.descriptives2)

2.3.1 Viz 1: % correct by chapter and book

There are 9 chapters and 2 book versions. The plot below shows that % correct seems to decline with chapter number (i.e., more advanced chapters are harder), but does not vary much by book. We also see increasing variance with increasing chapter number (i.e., there is more variability in more advanced chapters).

plot.pct_correct =
  df.summary %>%
  mutate(chapter = factor(chapter)) %>%
  # chapter on x-axis
  ggplot(aes(x = chapter,
             # % correct on y-axis
             y = (score*100),
         # color each bar based on graph type
         color = chapter)) +
  # dot plot for individual participants
  geom_point(aes(color = book),
             position = position_jitter(width = .1,
                                        height = 0),
             alpha = .3,
             size = 1) +
  # line plot connecting dots across chapters
  geom_line(aes(group = student_id, color = book),
            linewidth = .2,
            alpha = .03) +
  # plot chapter means with 95% CIs
  stat_summary(fun.data = mean_cl_boot,
               geom     = "errorbar",
               width    = 0.1,
               linewidth = 1,
               color    = "black") +
  stat_summary(fun = mean,
               geom = "point",
               shape = 18,
               size  = 3,
               color = "black") +
  # add title and axis labels
  labs(title = "% correct by Chapter & Book",
       x = "Chapter",
       y = "% correct")

plot.pct_correct

# export plot
ggsave((paste0(figures_dir,
               "pct_correct_bychapter.png")),
       width = 20, height = 10, device = "png")

2.3.2 Viz 2: Engagement across Chapters (Engagement over Time)

We computed 3 engagement metrics: (1) how many chapter pages were fully completed in a given chapter, (2) how many attempts student took, on average, across questions within a chapter, and (3), how long students took, on average, to answer questions within a chapter.

Not all students answered all questions in all chapters, so we get a range of N = 526 to 562 data points.

The proportion of full pages completed ranged from 12.6 to 27.6% and fluctuated across chapters. The first chapter had the most variance.

The average amount of time spent ranged from 9 to 14.2 minutes. The SD was quite large (12-26 minutes).

The mean number of attempts was consistently close to 1, but ranged from 1.08 to 1.23. It was highest for the first chapter. SDs ranged from .08 to .34.

# Print descriptive stats: For each chapter, compute % correct (mean), SD, and SE
df.descriptives_engagement =
  df.summary %>%
  group_by(chapter) %>%
  dplyr::summarise(n = n(),
                   # mean number of pages
                   mean_pages = mean(chapter_pages_completed,
                                     na.rm = T),
                   # SD number of pages
                   sd_pages = sd(chapter_pages_completed,
                                   na.rm = T),
                   # mean time spent
                   mean_time = mean(time_spent,
                                     na.rm = T),
                   sd_time = sd(time_spent,
                                na.rm = T),
                   # SD time spent
                   sd_pages = sd(time_spent,
                                 na.rm = T),
                                      # mean time spent
                   mean_attempts = mean(no_attempts,
                                            na.rm = T),
                   # SD time spent
                   sd_attempts = sd(no_attempts,
                                    na.rm = T)) %>%
  ungroup()

# print
View(df.descriptives_engagement)

The figure below visualizes this:

# no of pages completed within chapter
plot.pages =
  df.summary %>%
  ggplot(aes(x = factor(chapter),
             y = (chapter_pages_completed*100))) +
  # plot chapter means with 95% CIs
  stat_summary(fun.data = mean_cl_boot,
               geom     = "errorbar",
               width    = 0.1,
               linewidth = 1,
               color    = "black") +
  stat_summary(fun = mean,
               geom = "point",
               shape = 18,
               size  = 3,
               color = "black") +
  # dot plot for individual participants
  geom_point(position = position_jitter(width = .3,
                                        height = 0),
             alpha = .1,
             size = .5,
             color = "purple") +
  labs(x = "Chapter", y = "Mean % of chapter pages fully completed",
       title = "Average # of pages completed within chapter")

# average no of attempts across questions within chapter
plot.attempts =
  df.summary %>%
  ggplot(aes(x = factor(chapter),
             y = no_attempts)) +
  # plot chapter means with 95% CIs
  stat_summary(fun.data = mean_cl_boot,
               geom     = "errorbar",
               width    = 0.1,
               linewidth = 1,
               color    = "black") +
  stat_summary(fun = mean,
               geom = "point",
               shape = 18,
               size  = 3,
               color = "black") +
  # dot plot for individual participants
  geom_point(position = position_jitter(width = .3,
                                        height = 0),
             alpha = .1,
             size = .5,
             color = "purple") +
  labs(x = "Chapter", y = "Mean # of attempts across questions",
       title = "Average # of attempts across questions within chapter")

# avreage time spent across questions within chapter
plot.time =
  df.summary %>%
  ggplot(aes(x = factor(chapter),
             y = time_spent)) +
  # plot chapter means with 95% CIs
  stat_summary(fun.data = mean_cl_boot,
               geom     = "errorbar",
               width    = 0.1,
               linewidth = 1,
               color    = "black") +
  stat_summary(fun = mean,
               geom = "point",
               shape = 18,
               size  = 3,
               color = "black") +
  # dot plot for individual participants
  geom_point(position = position_jitter(width = .3,
                                        height = 0),
             alpha = .1,
             size = .5,
             color = "purple") +
  labs(x = "Chapter", y = "Mean time spent across questions (mins)",
       title = "Mean time spent across questions within chapter")

# combine plots
plot.engagement =
  plot.pages + plot.attempts + plot.time +
  plot_layout(guides = "collect") +
  plot_annotation(title = "Student engagement metrics by chapter")

plot.engagement

# export plot
ggsave((paste0(figures_dir,
               "engagement_bychapter.png")),
       width = 25, height = 10, device = "png")

3 Phase 2: Defining & evaluating statistical models

Next, we examine what predicts average student performance across chapters.

3.1 RQ1: Does % correct decrease with advancement of content?

# create new df
df.compare =
  df.reduced %>% 
  left_join(df.summary %>% 
              select(student_id, chapter, score_prev_chapter),
            by = c("student_id", "chapter")) %>%
  select(points_earned, chapter, book,
         attempt, score_prev_chapter,
         class_id, student_id) %>%
  mutate(across(c(class_id, book, student_id),
                factor)) %>%
  # create binary value marking missing values for prev chapter scores
  mutate(prev_missing = if_else(is.na(score_prev_chapter), 1, 0),
         # fill first chapter values with 0
         score_prev_chapter = replace_na(score_prev_chapter, 0)) %>%
  drop_na()
# quick check of correlation
df.summary %>% 
  summarise(r = cor(score, chapter, use = "pair"))
## # A tibble: 1 × 1
##        r
##    <dbl>
## 1 -0.456
# mixed effects model
lm.model1 =
  glmer(points_earned ~
          1 +
          # fixed effect for chapter and book
          chapter +
          book +
          # random intercept for class and student
          (1 | class_id) +
          (1 | student_id),
        family = binomial(link = "logit"),
        data = df.compare)
# summary
lm.model1 %>%
  summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## points_earned ~ 1 + chapter + book + (1 | class_id) + (1 | student_id)
##    Data: df.compare
## 
##      AIC      BIC   logLik deviance df.resid 
## 179294.8 179345.6 -89642.4 179284.8   193307 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.3489  0.2548  0.3856  0.5166  2.2969 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  student_id (Intercept) 0.436610 0.66076 
##  class_id   (Intercept) 0.005672 0.07531 
## Number of obs: 193312, groups:  student_id, 560; class_id, 5
## 
## Fixed effects:
##                                                  Estimate Std. Error
## (Intercept)                                      2.326583   0.062186
## chapter                                         -0.168990   0.002396
## bookCollege / Statistics and Data Science (ABC)  0.094735   0.092010
##                                                 z value Pr(>|z|)    
## (Intercept)                                       37.41   <2e-16 ***
## chapter                                          -70.55   <2e-16 ***
## bookCollege / Statistics and Data Science (ABC)    1.03    0.303    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) chaptr
## chapter     -0.206       
## bC/SaDS(ABC -0.637 -0.009
# p values
lm.model1 %>%
  joint_tests()
##  model term df1 df2  F.ratio    Chisq p.value
##  chapter      1 Inf 4976.555 4976.555  <.0001
##  book         1 Inf    1.060    1.060  0.3032
# Calculate estimates
lm.model1 %>%
  ggpredict(bias_correction = T)
## $chapter
## # Predicted probabilities of points_earned
## 
## chapter | Predicted |     95% CI
## --------------------------------
##       1 |      0.78 | 0.60, 0.64
##       2 |      0.75 | 0.58, 0.61
##       3 |      0.72 | 0.56, 0.59
##       4 |      0.69 | 0.54, 0.57
##       5 |      0.66 | 0.53, 0.55
##       6 |      0.63 | 0.52, 0.54
##       7 |      0.60 | 0.51, 0.53
##       9 |      0.56 | 0.50, 0.52
## 
## Adjusted for:
## *       book = College / Advanced Statistics and Data Science (ABCD)
## *   class_id =                                  0 (population-level)
## * student_id =                                  0 (population-level)
## 
## $book
## # Predicted probabilities of points_earned
## 
## book                                                  | Predicted |     95% CI
## ------------------------------------------------------------------------------
## College / Advanced Statistics and Data Science (ABCD) |      0.66 | 0.53, 0.55
## College / Statistics and Data Science (ABC)           |      0.68 | 0.54, 0.56
## 
## Adjusted for:
## *    chapter = 5.00
## *   class_id = 0 (population-level)
## * student_id = 0 (population-level)
## 
## attr(,"class")
## [1] "ggalleffects" "list"        
## attr(,"model.name")
## [1] "."

3.2 RQ2: Does student engagement predict % correct?

Including only number of attempts as an engagement predictor because the model does not converge using the other predictors.

# quick check of correlation
df.summary %>% 
  select(score, chapter,
         time_spent, no_attempts, chapter_pages_completed) %>% 
  cor(use = "pairwise.complete.obs")
##                               score    chapter   time_spent
## score                    1.00000000 -0.4557170  0.086554242
## chapter                 -0.45571704  1.0000000 -0.041677097
## time_spent               0.08655424 -0.0416771  1.000000000
## no_attempts             -0.02824059 -0.1815077  0.034590356
## chapter_pages_completed -0.18337909  0.3655008 -0.005932573
##                         no_attempts chapter_pages_completed
## score                   -0.02824059            -0.183379092
## chapter                 -0.18150775             0.365500846
## time_spent               0.03459036            -0.005932573
## no_attempts              1.00000000            -0.111370362
## chapter_pages_completed -0.11137036             1.000000000
# mixed effects model
lm.model2 =
  glmer(points_earned ~
          1 +
          # fixed effect for chapter, book
          chapter +
          book +
          # fixed effects for student engagement variable (only attempt)
          attempt +
          # random intercept for class and student
          (1 | class_id) +
          (1 | student_id),
        family = binomial(link = "logit"),
        data = df.compare)
# summary
lm.model2 %>%
  summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## points_earned ~ 1 + chapter + book + attempt + (1 | class_id) +  
##     (1 | student_id)
##    Data: df.compare
## 
##      AIC      BIC   logLik deviance df.resid 
## 179250.0 179311.1 -89619.0 179238.0   193306 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.3726  0.2546  0.3854  0.5167  2.2897 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  student_id (Intercept) 0.434013 0.65880 
##  class_id   (Intercept) 0.005763 0.07592 
## Number of obs: 193312, groups:  student_id, 560; class_id, 5
## 
## Fixed effects:
##                                                  Estimate Std. Error
## (Intercept)                                      2.383142   0.063186
## chapter                                         -0.170192   0.002404
## bookCollege / Statistics and Data Science (ABC)  0.093061   0.092968
## attempt                                         -0.040946   0.005894
##                                                 z value Pr(>|z|)    
## (Intercept)                                      37.716  < 2e-16 ***
## chapter                                         -70.810  < 2e-16 ***
## bookCollege / Statistics and Data Science (ABC)   1.001    0.317    
## attempt                                          -6.947 3.73e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) chaptr b/SaDS(
## chapter     -0.214               
## bC/SaDS(ABC -0.635 -0.008        
## attempt     -0.131  0.077  0.003
# p values
lm.model2 %>%
  joint_tests()
##  model term df1 df2  F.ratio    Chisq p.value
##  chapter      1 Inf 5014.042 5014.042  <.0001
##  book         1 Inf    1.002    1.002  0.3168
##  attempt      1 Inf   48.264   48.264  <.0001
# Calculate estimates
lm.model2 %>%
  ggpredict(bias_correction = T)
## $chapter
## # Predicted probabilities of points_earned
## 
## chapter | Predicted |     95% CI
## --------------------------------
##       1 |      0.78 | 0.60, 0.64
##       2 |      0.75 | 0.58, 0.61
##       3 |      0.72 | 0.56, 0.59
##       4 |      0.69 | 0.54, 0.57
##       5 |      0.66 | 0.53, 0.55
##       6 |      0.63 | 0.52, 0.54
##       7 |      0.60 | 0.51, 0.53
##       9 |      0.56 | 0.51, 0.52
## 
## Adjusted for:
## *       book = College / Advanced Statistics and Data Science (ABCD)
## *    attempt =                                                  1.00
## *   class_id =                                  0 (population-level)
## * student_id =                                  0 (population-level)
## 
## $book
## # Predicted probabilities of points_earned
## 
## book                                                  | Predicted |     95% CI
## ------------------------------------------------------------------------------
## College / Advanced Statistics and Data Science (ABCD) |      0.66 | 0.53, 0.55
## College / Statistics and Data Science (ABC)           |      0.68 | 0.54, 0.57
## 
## Adjusted for:
## *    chapter = 5.00
## *    attempt = 1.00
## *   class_id = 0 (population-level)
## * student_id = 0 (population-level)
## 
## $attempt
## # Predicted probabilities of points_earned
## 
## attempt | Predicted |     95% CI
## --------------------------------
##       0 |      0.67 | 0.53, 0.56
##      10 |      0.60 | 0.51, 0.53
##      15 |      0.57 | 0.50, 0.53
##      25 |      0.53 | 0.49, 0.52
##      35 |      0.50 | 0.48, 0.52
##      45 |      0.48 | 0.47, 0.52
##      55 |      0.45 | 0.44, 0.52
##      70 |      0.37 | 0.36, 0.51
## 
## Adjusted for:
## *    chapter =                                                  5.00
## *       book = College / Advanced Statistics and Data Science (ABCD)
## *   class_id =                                  0 (population-level)
## * student_id =                                  0 (population-level)
## 
## attr(,"class")
## [1] "ggalleffects" "list"        
## attr(,"model.name")
## [1] "."

3.3 RQ3: Does % correct on previous chapter predict % correct on current chapter?

# quick check of correlation
df.summary %>% 
  select(score, score_prev_chapter) %>% 
  cor(use = "pairwise.complete.obs")
##                        score score_prev_chapter
## score              1.0000000          0.7184284
## score_prev_chapter 0.7184284          1.0000000
# mixed effects model
lm.model3 =
  glmer(points_earned ~
          1 +
          # fixed effect for chapter, book
          chapter +
          book +
          # fixed effect for prev chapter score
          score_prev_chapter +
          # random intercept for class and student
          (1 | class_id) +
          (1 | student_id),
        family = binomial(link = "logit"),
        data = df.compare)
# summary
lm.model3 %>%
  summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## points_earned ~ 1 + chapter + book + score_prev_chapter + (1 |  
##     class_id) + (1 | student_id)
##    Data: df.compare
## 
##      AIC      BIC   logLik deviance df.resid 
## 178446.6 178507.6 -89217.3 178434.6   193306 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.0363  0.2201  0.3860  0.5249  2.2112 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  student_id (Intercept) 0.581753 0.76273 
##  class_id   (Intercept) 0.009534 0.09764 
## Number of obs: 193312, groups:  student_id, 560; class_id, 5
## 
## Fixed effects:
##                                                  Estimate Std. Error
## (Intercept)                                      3.135537   0.081804
## chapter                                         -0.177235   0.002511
## bookCollege / Statistics and Data Science (ABC)  0.132217   0.113242
## score_prev_chapter                              -1.009659   0.037101
##                                                 z value Pr(>|z|)    
## (Intercept)                                      38.330   <2e-16 ***
## chapter                                         -70.593   <2e-16 ***
## bookCollege / Statistics and Data Science (ABC)   1.168    0.243    
## score_prev_chapter                              -27.214   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) chaptr b/SaDS(
## chapter     -0.240               
## bC/SaDS(ABC -0.586 -0.010        
## scr_prv_chp -0.381  0.215 -0.013
# p values
lm.model3 %>%
  joint_tests()
##  model term         df1 df2  F.ratio    Chisq p.value
##  chapter              1 Inf 4983.325 4983.325  <.0001
##  book                 1 Inf    1.363    1.363  0.2430
##  score_prev_chapter   1 Inf  740.586  740.586  <.0001
# Calculate estimates
lm.model3 %>%
  ggpredict(bias_correction = T)
## $chapter
## # Predicted probabilities of points_earned
## 
## chapter | Predicted |     95% CI
## --------------------------------
##       1 |      0.78 | 0.60, 0.65
##       2 |      0.75 | 0.58, 0.62
##       3 |      0.72 | 0.56, 0.59
##       4 |      0.69 | 0.54, 0.57
##       5 |      0.66 | 0.53, 0.56
##       6 |      0.63 | 0.52, 0.54
##       7 |      0.60 | 0.51, 0.53
##       9 |      0.55 | 0.50, 0.52
## 
## Adjusted for:
## *               book = College / Advanced Statistics and Data Science (ABCD)
## * score_prev_chapter =                                                  0.76
## *           class_id =                                  0 (population-level)
## *         student_id =                                  0 (population-level)
## 
## $book
## # Predicted probabilities of points_earned
## 
## book                                                  | Predicted |     95% CI
## ------------------------------------------------------------------------------
## College / Advanced Statistics and Data Science (ABCD) |      0.66 | 0.53, 0.56
## College / Statistics and Data Science (ABC)           |      0.68 | 0.54, 0.57
## 
## Adjusted for:
## *            chapter = 5.00
## * score_prev_chapter = 0.76
## *           class_id = 0 (population-level)
## *         student_id = 0 (population-level)
## 
## $score_prev_chapter
## # Predicted probabilities of points_earned
## 
## score_prev_chapter | Predicted |     95% CI
## -------------------------------------------
##               0.00 |      0.79 | 0.61, 0.66
##               0.20 |      0.76 | 0.58, 0.63
##               0.40 |      0.72 | 0.56, 0.60
##               0.60 |      0.69 | 0.54, 0.57
##               0.80 |      0.65 | 0.53, 0.55
##               1.00 |      0.62 | 0.52, 0.54
## 
## Adjusted for:
## *    chapter =                                                  5.00
## *       book = College / Advanced Statistics and Data Science (ABCD)
## *   class_id =                                  0 (population-level)
## * student_id =                                  0 (population-level)
## 
## attr(,"class")
## [1] "ggalleffects" "list"        
## attr(,"model.name")
## [1] "."

3.4 Combined Model

lm.model4 =
  glmer(points_earned ~
          1 +
          # RQ1: fixed effect for chapter, book
          chapter +
          book +
          # RQ2: fixed effect for student engagement (attempts)
          attempt +
          # RQ3: fixed effect for prev chapter score
          score_prev_chapter +
          prev_missing +
          # random intercept for class and student
          (1 | class_id) +
          (1 | student_id),
        family = binomial(link = "logit"),
        data = df.compare)
# summary
lm.model4 %>%
  summary()
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## points_earned ~ 1 + chapter + book + attempt + score_prev_chapter +  
##     prev_missing + (1 | class_id) + (1 | student_id)
##    Data: df.compare
## 
##      AIC      BIC   logLik deviance df.resid 
## 177142.2 177223.6 -88563.1 177126.2   193304 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -10.8232   0.1614   0.3908   0.5187   2.3133 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  student_id (Intercept) 0.328508 0.57316 
##  class_id   (Intercept) 0.006215 0.07884 
## Number of obs: 193312, groups:  student_id, 560; class_id, 5
## 
## Fixed effects:
##                                                  Estimate Std. Error
## (Intercept)                                      1.176616   0.088683
## chapter                                         -0.103020   0.003307
## bookCollege / Statistics and Data Science (ABC)  0.111500   0.089329
## attempt                                         -0.042295   0.005959
## score_prev_chapter                               0.930898   0.067236
## prev_missing                                     2.762579   0.082018
##                                                 z value Pr(>|z|)    
## (Intercept)                                      13.268  < 2e-16 ***
## chapter                                         -31.148  < 2e-16 ***
## bookCollege / Statistics and Data Science (ABC)   1.248    0.212    
## attempt                                          -7.098 1.27e-12 ***
## score_prev_chapter                               13.845  < 2e-16 ***
## prev_missing                                     33.683  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) chaptr b/SaDS( attmpt scr_p_
## chapter     -0.586                             
## bC/SaDS(ABC -0.411 -0.019                      
## attempt     -0.071  0.039  0.003               
## scr_prv_chp -0.727  0.628 -0.022  -0.031       
## prev_missng -0.556  0.562 -0.010  -0.023  0.720
# p values
lm.model4 %>%
  joint_tests()
##  model term         df1 df2  F.ratio    Chisq p.value
##  chapter              1 Inf  970.208  970.208  <.0001
##  book                 1 Inf    1.558    1.558  0.2120
##  attempt              1 Inf   50.375   50.375  <.0001
##  score_prev_chapter   1 Inf  191.692  191.692  <.0001
##  prev_missing         1 Inf 1134.516 1134.516  <.0001
# Calculate estimates
lm.model4 %>%
  ggpredict(bias_correction = T)
## $chapter
## # Predicted probabilities of points_earned
## 
## chapter | Predicted |     95% CI
## --------------------------------
##       1 |      0.74 | 0.57, 0.60
##       2 |      0.72 | 0.56, 0.59
##       3 |      0.70 | 0.55, 0.58
##       4 |      0.68 | 0.54, 0.57
##       5 |      0.67 | 0.53, 0.56
##       6 |      0.65 | 0.53, 0.55
##       7 |      0.63 | 0.52, 0.54
##       9 |      0.60 | 0.51, 0.53
## 
## Adjusted for:
## *               book = College / Advanced Statistics and Data Science (ABCD)
## *            attempt =                                                  1.00
## * score_prev_chapter =                                                  0.76
## *       prev_missing =                                                  0.07
## *           class_id =                                  0 (population-level)
## *         student_id =                                  0 (population-level)
## 
## $book
## # Predicted probabilities of points_earned
## 
## book                                                  | Predicted |     95% CI
## ------------------------------------------------------------------------------
## College / Advanced Statistics and Data Science (ABCD) |      0.67 | 0.53, 0.56
## College / Statistics and Data Science (ABC)           |      0.69 | 0.54, 0.57
## 
## Adjusted for:
## *            chapter = 5.00
## *            attempt = 1.00
## * score_prev_chapter = 0.76
## *       prev_missing = 0.07
## *           class_id = 0 (population-level)
## *         student_id = 0 (population-level)
## 
## $attempt
## # Predicted probabilities of points_earned
## 
## attempt | Predicted |     95% CI
## --------------------------------
##       0 |      0.67 | 0.54, 0.56
##      10 |      0.60 | 0.51, 0.53
##      15 |      0.57 | 0.50, 0.53
##      25 |      0.53 | 0.49, 0.52
##      35 |      0.50 | 0.48, 0.52
##      45 |      0.48 | 0.46, 0.52
##      55 |      0.45 | 0.44, 0.52
##      70 |      0.36 | 0.36, 0.51
## 
## Adjusted for:
## *            chapter =                                                  5.00
## *               book = College / Advanced Statistics and Data Science (ABCD)
## * score_prev_chapter =                                                  0.76
## *       prev_missing =                                                  0.07
## *           class_id =                                  0 (population-level)
## *         student_id =                                  0 (population-level)
## 
## $score_prev_chapter
## # Predicted probabilities of points_earned
## 
## score_prev_chapter | Predicted |     95% CI
## -------------------------------------------
##               0.00 |      0.56 | 0.50, 0.52
##               0.20 |      0.58 | 0.51, 0.52
##               0.40 |      0.61 | 0.52, 0.53
##               0.60 |      0.64 | 0.52, 0.54
##               0.80 |      0.67 | 0.54, 0.56
##               1.00 |      0.70 | 0.55, 0.58
## 
## Adjusted for:
## *      chapter =                                                  5.00
## *         book = College / Advanced Statistics and Data Science (ABCD)
## *      attempt =                                                  1.00
## * prev_missing =                                                  0.07
## *     class_id =                                  0 (population-level)
## *   student_id =                                  0 (population-level)
## 
## $prev_missing
## # Predicted probabilities of points_earned
## 
## prev_missing | Predicted |     95% CI
## -------------------------------------
##            0 |      0.63 | 0.52, 0.54
##            1 |      0.96 | 0.88, 0.91
## 
## Adjusted for:
## *            chapter =                                                  5.00
## *               book = College / Advanced Statistics and Data Science (ABCD)
## *            attempt =                                                  1.00
## * score_prev_chapter =                                                  0.76
## *           class_id =                                  0 (population-level)
## *         student_id =                                  0 (population-level)
## 
## attr(,"class")
## [1] "ggalleffects" "list"        
## attr(,"model.name")
## [1] "."

3.4.1 Comparison

models = list(`Model 1` = lm.model1,
              `Model 2` = lm.model2,
              `Model 3` = lm.model3,
              `Model 4 (Full)` = lm.model4)

# compute AIC and BIC
aic_bic =
  performance::compare_performance(
    models,
    metrics = c("AIC", "BIC"),
    rank = TRUE
    ) %>%
  select(Model = Name, AIC = AIC_wt, BIC = BIC_wt)

# compute RMSE
get_rmse =
  function(model, data) {
    pred = predict(model, newdata = data, type = "response",
                   na.action = na.exclude)
  ok = !is.na(pred) & !is.na(data$points_earned)
  sqrt(mean((data$points_earned[ok] - pred[ok])^2))
  }

rmse_tbl =
  tibble(Model = names(models),
         RMSE  = map_dbl(models, get_rmse, data = df.compare)
         )

# merge
df.metrics =
  left_join(aic_bic, rmse_tbl, by = "Model") %>%
  arrange(AIC) %>%
  mutate(across(where(is.numeric), round, digits = 5))

View(df.metrics)

4 Predicting the Held-Out Data

We have a held-out 20% of data that contains entirely new items. We now have the models above make predictions for that held-out dataset (predicting proportion correct for these new items).

# # Import held out data
# df.heldout =
#   read.csv(paste0(data_dir, "filtered20_responses_2023.csv"))
# 
# df.heldout =
#   df.heldout %>%
#   group_by(book, release,
#            institution_id, class_id,
#            student_id,
#            chapter_num,
#            item_id) %>%
#   # count only last attempt
#   slice_max(order_by = attempt,
#             n = 1,
#             with_ties = F) %>% 
#   ungroup() %>%
#   # keep only relevant cols
#   select(c(book, release,
#            institution_id, class_id,
#            student_id,
#            chapter = chapter_num, item_id,
#            points_possible, points_earned, attempt))
# 
# # compute engagement
# df.summary_heldout =
#   df.heldout %>%
#   # group % correct by book, release, institution, class, student, and chapter
#   group_by(book, release,
#            institution_id, class_id, student_id,
#            chapter) %>%
#   # calculate % correct across chapter
#   summarise(score = mean(points_earned, na.rm = T),
#             # calculate average number of attempts across chapter
#             no_attempts = mean(attempt, na.rm = T),
#             .groups = "drop_last") %>%
#   arrange(book, release,
#           institution_id, class_id, student_id,
#           chapter) %>% 
#   group_by(book, release, institution_id, class_id, student_id) %>% 
#   # add column for % correct on preceding chapter
#   mutate(score_prev_chapter = lag(score)) %>%   # NA for the first chapter
#   ungroup()
# 
# df.heldout =
#   df.heldout %>% 
#   left_join(df.summary_heldout %>% 
#               select(student_id, chapter, score_prev_chapter),
#             by = c("student_id", "chapter")) %>%
#   select(points_earned, chapter, book,
#          attempt, score_prev_chapter,
#          class_id, student_id) %>%
#   mutate(across(c(class_id, book, student_id),
#                 factor)) %>%
#   # create binary value marking missing values for prev chapter scores
#   mutate(prev_missing = if_else(is.na(score_prev_chapter), 1, 0),
#          # fill first chapter values with 0
#          score_prev_chapter = replace_na(score_prev_chapter, 0)) %>%
#   drop_na()
# # predict held out students
# df.predict =
#   df.heldout %>% 
#   mutate(pred = predict(lm.model4, newdata = ., type = "response",
#                         re.form = NA, # fixed‑effects only
#                         allow.new.levels = TRUE))
# 
# # aggregate to one row per student
# df.submit =
#   df.predict %>% 
#   group_by(id = student_id) %>% 
#   summarise(score = mean(pred), .groups = "drop")
# 
# print(df.submit)